Column

items ordered in each department

library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(janitor)
library(stringr)
library(forcats)
library(viridis)
## Loading required package: viridisLite
library(ggridges)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(shiny)

instacart = read_csv("../data/instacart_train_data.csv.zip")
## Parsed with column specification:
## cols(
##   order_id = col_integer(),
##   product_id = col_integer(),
##   add_to_cart_order = col_integer(),
##   reordered = col_integer(),
##   user_id = col_integer(),
##   eval_set = col_character(),
##   order_number = col_integer(),
##   order_dow = col_integer(),
##   order_hour_of_day = col_integer(),
##   days_since_prior_order = col_integer(),
##   product_name = col_character(),
##   aisle_id = col_integer(),
##   department_id = col_integer(),
##   aisle = col_character(),
##   department = col_character()
## )
instacart %>%
  group_by(department) %>%
  summarize(items_ordered = n()) %>%
  mutate(depart_rank = min_rank(desc(items_ordered))) %>%
  filter(depart_rank <= 7) %>%
  arrange(depart_rank) %>%
  mutate(department = fct_reorder(department, items_ordered)) %>%
  plot_ly(y = ~items_ordered, x = ~department, type = "bar")

Column

Order hours for items from each department

order_hour = 
  instacart %>%
  mutate(department = fct_reorder(department, order_hour_of_day, IQR)) %>%
  ggplot(aes(x = department, y = order_hour_of_day, fill = department)) + 
  geom_violin(bw = .6) +
  theme(axis.text.x = element_text(angle = 90))

ggplotly(order_hour)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

mean order hour for each day

mean_hour =
  instacart %>%
  group_by(product_name, order_dow) %>%
  summarize(mean_hour = mean(order_hour_of_day)) %>%
  spread(key = order_dow, value = mean_hour) %>%
  gather(key = day, value = mean_hour, "0":"6") %>%
  ggplot(aes(x = day, y = mean_hour, color = day)) +
  geom_boxplot()

ggplotly(mean_hour)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 119551 rows containing non-finite values (stat_boxplot).